clear all

*Define global path for replication package
global path "~/Dropbox/IT_Revolution/Replication_package/JPE submission"

global path_rawdata "$path/Raw_data"
global path_cleandata "$path/Clean_data"
global path_output "$path/Output"

*** Step 1: Create data by cohort 
********************************************************************************
*1.1: ICT
use "$path_cleandata/data_clean_1980_2010.dta", clear

*sample of males aged 16-64 in the labor force
keep if sex == 1

*merge exposure variables
merge m:1 occ1990dd using "$path_cleandata/temp_high_exp_late.dta"
keep if _m==3
drop _m

*create data by occupation and age group
gen age_group =  age
collapse (sum) emp [aw = perwt], by(occ1990dd exposure_ict high_exp75 year age_group) fast

save "$path_cleandata/temp_data_late_cohort.dta", replace

*1.2: Manufacturing
use "$path_cleandata/data_clean_1900_1940.dta", clear 

*sample of males aged 16-64 in the labor force
keep if sex == 1

merge m:1 occ using "$path_cleandata/temp_high_exp_early.dta"
keep if _m==3
drop _m

*create data by occupation and age group
gen age_group =  age
collapse (sum) emp [aw = perwt], by(occ exposure_manuf high_exp75 year age_group) fast

save "$path_cleandata/temp_data_early_cohort.dta", replace

* Step 2: Variance decomposition of change in employment share associated with year fixed effects
********************************************************************************
*2.1: Manufacturing exposure
use "$path_cleandata/temp_data_early_cohort.dta", clear

*create data set with employment share in highly exposed occupations
gen indH = high_exp75
keep if year >= 1900 & year <= 1940

collapse (sum) emp, by(indH  year age_group) fast

egen tot_emp_cohort = sum(emp), by(year age_group)
egen tot_emp = sum(emp), by(year)
gen share_emp = emp/tot_emp_cohort
gen cohort = year - age_group

drop if indH  == 0
drop if age_group == 15

*define panel of cohorts
xtset cohort year , delta(10)
gen dshare_emp = log(share_emp/l.share_emp)

*baseline version with only time fixed effects for emp growth
reg dshare_emp i.year  
predict dpred_res_year, r

gen r2 = e(r2)
gen one_minus_r2 = 1 - r2

summ dpred_res_year  
gen sd_res = r(sd)

*extended version with cohort time treds
xtreg dshare_emp i.year , fe 

gen r2_fe = e(r2_w)
gen one_minus_r2_fe = 1 - r2_fe
gen sd_res_fe = e(sigma_e)
gen n = e(N)
gen nc = e(N_g)

*generate output
keep one_minus* sd_* n nc
duplicates drop
gen period = 1

save "$path_cleandata/temp_early_manuf.dta", replace

*2.2: ICT exposure
use "$path_cleandata/temp_data_late_cohort.dta", clear

*create data set with employment share in highly exposed occupations
gen indH = high_exp75
replace year = 2020 if year == 2018
keep if year >= 1980 & year <= 2020

collapse (sum) emp, by(indH  year age_group) fast

egen tot_emp_cohort = sum(emp), by(year age_group)
egen tot_emp = sum(emp), by(year)
gen share_emp = emp/tot_emp_cohort
gen cohort = year - age_group

drop if indH  == 0

*define panel of cohorts
xtset cohort year , delta(10)
gen dshare_emp = log(share_emp/l.share_emp) 

*baseline version with only time fixed effects for emp growth
reg dshare_emp i.year  
predict dpred_res_year, r

gen r2 = e(r2)
gen one_minus_r2 = 1 - r2

summ dpred_res_year  
gen sd_res = r(sd)

*extended version with cohort time treds
xtreg dshare_emp i.year , fe 

gen r2_fe = e(r2_w)
gen one_minus_r2_fe = 1 - r2_fe
gen sd_res_fe = e(sigma_e)
gen n = e(N)
gen nc = e(N_g)

*generate output
keep one_minus* sd_* n nc
duplicates drop
gen period = 2

save "$path_cleandata/temp_late_ict.dta", replace

*Step 3: Output Table
********************************************************************************
use "$path_cleandata/temp_late_ict.dta", clear

append using  "$path_cleandata/temp_early_manuf.dta"

order period 

estpost tabstat one_minus_r2 sd_res if period == 2, c(stat)
est sto decomposition_post_A

estpost tabstat one_minus_r2 sd_res if period == 1, c(stat)
est sto decomposition_pre_A

estpost tabstat one_minus_r2_fe sd_res_fe if period == 2, c(stat)
est sto decomposition_post_B

estpost tabstat one_minus_r2_fe sd_res_fe if period == 1, c(stat)
est sto decomposition_pre_B

estpost tabstat n nc if period == 2, c(stat)
est sto decomposition_post_obs

estpost tabstat n nc if period == 1, c(stat)
est sto decomposition_pre_obs

esttab decomposition_post_A decomposition_pre_A using "$path_output/TabA3_decomposition.tex",	///
		main(mean %9.3f) extracols(2) fragment booktabs style(tex)	///
		posthead("\multicolumn{4}{l}{Panel A: Importance of cohort-specific components} \\[2pt]") ///
		varlabels(one_minus_r2 "\hspace{3mm} \$ 1 - R^2\$" sd_res "\hspace{3mm} St. Dev. of \$\epsilon _{c,t}\$")	///
		collabels(none) mlabels(none) onecell nonumbers noobs nonote substitute(\_ _) replace
		
esttab decomposition_post_B decomposition_pre_B using "$path_output/TabA3_decomposition.tex",	///
		main(mean %9.3f) extracols(2) fragment booktabs style(tex)	///
		posthead("[4pt] \multicolumn{4}{l}{Panel B: Importance of cohort-specific components (w/ cohort linear trend controls)} \\[2pt]") ///
		varlabels(one_minus_r2_fe "\hspace{3mm} \$ 1 - R^2\$" sd_res_fe "\hspace{3mm} St. Dev. of \$\epsilon _{c,t}\$")	postfoot("[4pt]") ///
		collabels(none) mlabels(none) onecell nonumbers noobs nonote substitute(\_ _) append
		
esttab decomposition_post_obs decomposition_pre_obs using "$path_output/TabA3_decomposition.tex",	///
		main(mean %9.0f) extracols(2) fragment booktabs style(tex)	///
		posthead("\hline") ///
		varlabels(n "Cohort-period obs." nc "Cohort obs.")	///
		postfoot("\hline") ///
		collabels(none) mlabels(none) onecell nonumbers noobs nonote substitute(\_ _) append
		
erase "$path_cleandata/temp_late_ict.dta"
erase "$path_cleandata/temp_early_manuf.dta"
erase "$path_cleandata/temp_data_early_cohort.dta"
erase "$path_cleandata/temp_data_late_cohort.dta"




